import warnings
warnings.filterwarnings("ignore")
# Libraries for data manipulation and visualization
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df_armed_conflict = pd.read_excel("Task 1 - Data Incident.xlsx")
# Copying data to another variable to avoid any changes to the original data
df = df_armed_conflict.copy()
df = df[(df['country'] == 'Mali') & (df['year'].isin([2021, 2022])) & (df['event_type'] == 'Violence against civilians')]
df.head()
| data_id | iso | event_id_cnty | event_id_no_cnty | event_date | year | time_precision | event_type | sub_event_type | actor1 | ... | location | latitude | longitude | geo_precision | source | source_scale | notes | fatalities | timestamp | iso3 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 37 | 9727585 | 466 | MLI7872 | 7872 | 2022-12-30 | 2022 | 1 | Violence against civilians | Attack | Military Forces of Mali (2021-) | ... | Sossobe Togoro | 14.5608 | -4.6746 | 1 | Twitter; Undisclosed Source | Local partner-Other | On 30 December 2022, FAMa and Wagner forces co... | 2 | 1673307074 | MLI |
| 38 | 9727643 | 466 | MLI7930 | 7930 | 2022-12-30 | 2022 | 1 | Violence against civilians | Attack | Unidentified Armed Group (Mali) | ... | Gao | 16.2717 | -0.0447 | 1 | Twitter; Undisclosed Source | Local partner-Other | On 30 December 2022, an unidentified armed gro... | 0 | 1673307074 | MLI |
| 39 | 9727644 | 466 | MLI7931 | 7931 | 2022-12-30 | 2022 | 1 | Violence against civilians | Attack | Unidentified Armed Group (Mali) | ... | Gao | 16.2717 | -0.0447 | 1 | Undisclosed Source | Local partner-Other | On 30 December 2022, an unidentified armed gro... | 0 | 1673307074 | MLI |
| 40 | 9727645 | 466 | MLI7932 | 7932 | 2022-12-30 | 2022 | 1 | Violence against civilians | Abduction/forced disappearance | Islamic State (Sahel) | ... | Anderamboukane | 15.4220 | 3.0224 | 1 | Undisclosed Source | Local partner-Other | On 30 December 2022, IS Sahel militants abduct... | 0 | 1673307074 | MLI |
| 66 | 9861783 | 466 | MLI7929 | 7929 | 2022-12-30 | 2022 | 1 | Violence against civilians | Attack | Dawsahak Ethnic Militia (Mali) | ... | Bagoundie | 16.2191 | -0.0193 | 2 | Twitter; Undisclosed Source | Local partner-Other | On 30 December 2022, Dawsahak gunmen opened fi... | 1 | 1677520657 | MLI |
5 rows × 31 columns
df.shape
(1128, 31)
df["data_id"].nunique()
df["event_id_cnty"].nunique()
1128
All the values in the ID column are unique. We can drop this column as it would not add value to our analysis.
# Dropping ID column
df.drop(["data_id"], axis = 1, inplace = True)
df.drop(["event_id_cnty"], axis = 1, inplace = True)
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1128 entries, 37 to 21509 Data columns (total 29 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 iso 1128 non-null int64 1 event_id_no_cnty 1128 non-null int64 2 event_date 1128 non-null datetime64[ns] 3 year 1128 non-null int64 4 time_precision 1128 non-null int64 5 event_type 1128 non-null object 6 sub_event_type 1128 non-null object 7 actor1 1128 non-null object 8 assoc_actor_1 235 non-null object 9 inter1 1128 non-null int64 10 actor2 1128 non-null object 11 assoc_actor_2 816 non-null object 12 inter2 1128 non-null int64 13 interaction 1128 non-null int64 14 region 1128 non-null object 15 country 1128 non-null object 16 admin1 1128 non-null object 17 admin2 1128 non-null object 18 admin3 1128 non-null object 19 location 1128 non-null object 20 latitude 1128 non-null float64 21 longitude 1128 non-null float64 22 geo_precision 1128 non-null int64 23 source 1128 non-null object 24 source_scale 1128 non-null object 25 notes 1128 non-null object 26 fatalities 1128 non-null int64 27 timestamp 1128 non-null int64 28 iso3 1128 non-null object dtypes: datetime64[ns](1), float64(2), int64(10), object(16) memory usage: 264.4+ KB
# Making a list of all categorical variables
cat_col = list(df.select_dtypes("object").columns)
# Printing count of each unique value in each categorical column
for column in cat_col:
print(df[column].value_counts(normalize = False))
print("-" * 50)
Violence against civilians 1128
Name: event_type, dtype: int64
--------------------------------------------------
Attack 767
Abduction/forced disappearance 357
Sexual violence 4
Name: sub_event_type, dtype: int64
--------------------------------------------------
Unidentified Armed Group (Mali) 226
JNIM: Group for Support of Islam and Muslims 226
Katiba Macina 155
Military Forces of Mali (2021-) 113
Islamic State (Sahel) 108
Dozo Communal Militia (Mali) 75
Dan Na Ambassagou 69
Islamic State (West Africa) - Greater Sahara Faction 62
Military Forces of Mali (2020-2021) 22
MSA: Movement for Azawad Salvation 14
Wagner Group 13
Katiba Serma 5
Police Forces of Mali (2021-) Gendarmerie 4
Police Forces of Mali (2021-) 3
Soninke Ethnic Militia (Mali) 3
Katiba Gourma 3
GATIA: Imghad Tuareg and Allies Self-Defense Group 3
JNIM: Group for Support of Islam and Muslims and/or Islamic State (West Africa) - Greater Sahara Faction 2
AQIM: Al Qaeda in the Islamic Maghreb 2
Military Forces of France (2017-) 2
MINUSMA: United Nations Multidimensional Integrated Stabilization Mission in Mali (2021-) 2
Ansar Dine 1
Unidentified Communal Militia (Mali) 1
JNIM: Group for Support of Islam and Muslims and/or Islamic State (Sahel) 1
Bambara Ethnic Militia (Mali) 1
MAA: Arab Movement of the Azawad (Platform) 1
FLN: Front for the Liberation of the Northern Regions 1
Unidentified Armed Group (Chad) 1
Unidentified Armed Group (International) 1
Fulani Ethnic Militia (Mali) 1
Private Security Forces (China) 1
Dawsahak Ethnic Militia (Mali) 1
Gandakoy Ethnic Militia (Mali) 1
Military Forces of Niger (2021-) 1
Ganda Izo 1
Military Forces of Chad (1990-2021) 1
Military Forces of Chad (2021-) 1
Name: actor1, dtype: int64
--------------------------------------------------
JNIM: Group for Support of Islam and Muslims 166
Wagner Group 35
Wagner Group; Dan Na Ambassagou 9
Military Forces of Mali (2021-) 7
Dan Na Ambassagou 4
Dozo Communal Militia (Mali) 3
GATIA: Imghad Tuareg and Allies Self-Defense Group 3
G5S: G5 Sahel Force (2017-) 2
Wagner Group; Dozo Communal Militia (Mali) 1
CMFPR1: Coordination of Patriotic Movements and Forces for Resistance (Plateforme) 1
Military Forces of Mali (2021-); Dozo Communal Militia (Mali) 1
MSA: Movement for Azawad Salvation 1
Pastoralists (Mali) 1
MINUSMA: United Nations Multidimensional Integrated Stabilization Mission in Mali (2020-2021) 1
Name: assoc_actor_1, dtype: int64
--------------------------------------------------
Civilians (Mali) 1097
Civilians (Niger) 10
Civilians (Burkina Faso) 5
Civilians (Mauritania) 4
Civilians (Nigeria) 2
Civilians (Algeria) 2
Civilians (Germany) 1
Civilians (Sudan) 1
Civilians (Thailand) 1
Civilians (Italy) 1
Civilians (Morocco) 1
Civilians (China) 1
Civilians (Togo) 1
Civilians (France) 1
Name: actor2, dtype: int64
--------------------------------------------------
Fulani Ethnic Group (Mali); Pastoralists (Mali) 122
Labour Group (Mali) 122
Tuareg Ethnic Group (Mali) 35
Dogon Ethnic Group (Mali) 30
Dogon Ethnic Group (Mali); Farmers (Mali) 25
...
Dawsahak Ethnic Group (Mali); Ganda Izo 1
Labour Group (Mali); UN: United Nations 1
CRM: Malian Red Cross; Aid Workers (Mali) 1
Ibi Communal Group (Mali); Dogon Ethnic Group (Mali) 1
Dogon Ethnic Group (Mali); Women (Mali); Farmers (Mali) 1
Name: assoc_actor_2, Length: 218, dtype: int64
--------------------------------------------------
Western Africa 1128
Name: region, dtype: int64
--------------------------------------------------
Mali 1128
Name: country, dtype: int64
--------------------------------------------------
Mopti 458
Gao 227
Segou 153
Menaka 96
Tombouctou 92
Sikasso 33
Koulikoro 26
Kayes 22
Kidal 11
Bamako 10
Name: admin1, dtype: int64
--------------------------------------------------
Ansongo 123
Niono 95
Douentza 93
Gao 91
Bandiagara 91
Djenne 71
Mopti 69
Bankass 62
Menaka 60
Koro 40
Tombouctou 28
Gourma-Rharous 27
Macina 25
Yorosso 21
Anderamboukane 21
Goundam 19
Tenenkou 17
Youwarou 15
Segou 15
Nara 15
Bourem 13
Tominian 12
Niafunke 11
Bamako 10
Inekar 10
Dire 7
Bafoulabe 7
Diema 7
Sikasso 6
San 6
Kidal 6
Tidermene 5
Kolokani 4
Kita 3
Koutiala 3
Abeibara 3
Banamba 3
Kati 3
Kenieba 3
Kayes 2
Tessalit 2
Dioila 1
Bougouni 1
Yanfolila 1
Kadiolo 1
Name: admin2, dtype: int64
--------------------------------------------------
Menaka 60
Gao 42
Ouattagouna 31
Bara Sara 23
Doucoumbo 23
..
Pelengana 1
Diougani 1
Kava 1
Kassaro 1
Koporokendie Na 1
Name: admin3, Length: 215, dtype: int64
--------------------------------------------------
Gao 41
Menaka 32
Timbuktu 15
Parou 14
Douentza 13
..
Tiogou 1
Ogossagou-Dogon 1
Deguena 1
Ouro-Nema 1
Arabebe 1
Name: location, Length: 613, dtype: int64
--------------------------------------------------
Undisclosed Source 218
Signal 98
Whatsapp 76
Twitter 55
Studio Tamani 40
...
Whatsapp; Undisclosed Source; Signal 1
Undisclosed Source; MSA Azawad 1
Signal; Koro Kibaru; Twitter 1
Facebook; Radio Guintan; Undisclosed Source; FAMAMali 1
Koro Kibaru; Mali Jet; Le Pays Dogon; AMAP; FAMAMali 1
Name: source, Length: 354, dtype: int64
--------------------------------------------------
Local partner-Other 436
New media 344
New media-National 108
National 102
New media-Subnational 52
Subnational 19
Other-New media 15
Local partner-New media 12
Other 11
New media-International 9
International 6
Other-National 5
National-International 3
Subnational-National 2
Regional 1
New media-Regional 1
Other-International 1
National-Regional 1
Name: source_scale, dtype: int64
--------------------------------------------------
On 1 March 2021, the Malian army entered the villages of Tambarga and Guittiram (Douentza, Mopti). The soldiers reportedly killed two Fulani male civilians, and burned houses, granaries, and livestock. Fatalities are split across two events. 2
On 16 December 2022, JNIM militants killed two people and abducted ten others from the Dawsahak community in the village of Taghatert (Menaka, Menaka). JNIM claimed responsibility and said it killed 5 IS Sahel militants in Inekar, likely referring to Inekar Ouest near Taghatert. 2
On 15 December 2022, JNIM militants abducted the village chief and his son in the village of Soye (Mopti, Mopti). The abducted were released on 30 December. 2
On 4 May 2021, presumed Katiba Macina (JNIM) militants simultaneously attacked the villages of Ndobougou and Kounti-Marka (Djenne, Mopti). The militants killed three people and wounded others. Fatalities are split across two events. 2
On 7 September 2021, presumed Dana Ambassagou militiamen abducted a Fulani man in the town of Sevare (Mopti, Mopti). 1
..
On 11 June 2022, JNIM militants attacked a toll booth in Sienso (San, Segou). The militants killed a toll booth attendant and set ablaze buildings. JNIM claimed responsibility in a statement. 1
On 11 June 2022, FAMa and Wagner accompanied by Dan Na Ambassagou militiamen attacked the village of Yalema (Bankass, Mopti). Three people were killed, jewelry stolen, and a water tower sabotaged. 1
On 10 June 2022, presumed MSA militiamen aboard a pickup truck abducted an Arab merchant in the town of Menaka (Menaka, Menaka). 1
On 10 June 2022, IS Sahel militants killed two pastoralist men in the village of Inarinkadane (Menaka, Menaka). 1
On 2 January 2021, unknown gunmen abducted an imam in the village of Arabebe (Niafunke, Tombouctou). The imam was released on 22 January. 1
Name: notes, Length: 1124, dtype: int64
--------------------------------------------------
MLI 1128
Name: iso3, dtype: int64
--------------------------------------------------
df.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| iso | 1128.0 | 4.660000e+02 | 0.000000e+00 | 4.660000e+02 | 4.660000e+02 | 4.660000e+02 | 4.660000e+02 | 4.660000e+02 |
| event_id_no_cnty | 1128.0 | 6.298388e+03 | 9.696360e+02 | 4.627000e+03 | 5.434750e+03 | 6.359500e+03 | 7.107250e+03 | 8.288000e+03 |
| year | 1128.0 | 2.021559e+03 | 4.967850e-01 | 2.021000e+03 | 2.021000e+03 | 2.022000e+03 | 2.022000e+03 | 2.022000e+03 |
| time_precision | 1128.0 | 1.057624e+00 | 2.406263e-01 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 3.000000e+00 |
| inter1 | 1128.0 | 2.474291e+00 | 1.153389e+00 | 1.000000e+00 | 2.000000e+00 | 2.000000e+00 | 3.000000e+00 | 8.000000e+00 |
| inter2 | 1128.0 | 7.000000e+00 | 0.000000e+00 | 7.000000e+00 | 7.000000e+00 | 7.000000e+00 | 7.000000e+00 | 7.000000e+00 |
| interaction | 1128.0 | 3.157535e+01 | 1.076932e+01 | 1.700000e+01 | 2.700000e+01 | 2.700000e+01 | 3.700000e+01 | 7.800000e+01 |
| latitude | 1128.0 | 1.492655e+01 | 1.174712e+00 | 1.089470e+01 | 1.415333e+01 | 1.482675e+01 | 1.581960e+01 | 2.267440e+01 |
| longitude | 1128.0 | -2.781737e+00 | 2.920067e+00 | -1.205490e+01 | -4.511325e+00 | -3.570600e+00 | -4.470000e-02 | 3.709500e+00 |
| geo_precision | 1128.0 | 1.332447e+00 | 4.731781e-01 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 2.000000e+00 | 3.000000e+00 |
| fatalities | 1128.0 | 2.317376e+00 | 8.722294e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 1.000000e+00 | 1.300000e+02 |
| timestamp | 1128.0 | 1.647062e+09 | 1.749793e+07 | 1.613420e+09 | 1.630983e+09 | 1.649700e+09 | 1.663010e+09 | 1.678155e+09 |
for col in ['iso', 'year', 'time_precision','inter1','inter2','interaction','latitude','longitude','geo_precision','fatalities','timestamp']:
print(col)
print('Skew :',round(df[col].skew(), 2))
plt.figure(figsize = (15, 4))
plt.subplot(1, 2, 1)
df[col].hist(bins = 10, grid = False)
plt.ylabel('count')
plt.subplot(1, 2, 2)
sns.boxplot(x = df[col])
plt.show()
iso Skew : 0
year Skew : -0.24
time_precision Skew : 4.18
inter1 Skew : 2.09
inter2 Skew : 0
interaction Skew : 1.57
latitude Skew : 0.38
longitude Skew : 0.17
geo_precision Skew : 0.74
fatalities Skew : 9.94
timestamp Skew : -0.07
import plotly.express as px
fig = px.scatter(df, x='longitude', y='latitude', color='sub_event_type', hover_data=['event_date'])
fig.show()
fig = px.scatter(df, x='longitude', y='latitude', size='fatalities', color='sub_event_type', hover_data=['event_date'])
fig.show()
events_by_actor=df.groupby('actor1')['event_type'].count().sort_values(ascending=False).nlargest(10)
fatalities_by_actor=df.groupby('actor1')['fatalities'].sum().sort_values(ascending=False).nlargest(10)
fig, (ax1,ax2) = plt.subplots(nrows=2,figsize=(6,8))
sns.barplot(x=events_by_actor.values, y=events_by_actor.index, color='blue', ax=ax1)
sns.barplot(x=fatalities_by_actor.values, y=fatalities_by_actor.index, color='red', ax=ax2)
ax1.set_title('Actors with the most events')
ax2.set_title('Actors with the most fatalities')
ax1.set_xlabel('Number of events')
ax2.set_xlabel('Number of fatalities')
fig.subplots_adjust(hspace=0.3)
plt.show()
events_by_city=df.groupby('admin1')['event_type'].count().sort_values(ascending=False).nlargest(10)
fatalities_by_city=df.groupby('admin1')['fatalities'].sum().sort_values(ascending=False).nlargest(10)
fig, (ax1,ax2) = plt.subplots(nrows=2,figsize=(6,8))
sns.barplot(x=events_by_city.values, y=events_by_city.index, color='blue', ax=ax1)
sns.barplot(x=fatalities_by_city.values, y=fatalities_by_city.index, color='red', ax=ax2)
ax1.set_title('Cities with the most events')
ax2.set_title('Cities with the most fatalities')
ax1.set_xlabel('Number of events')
ax2.set_xlabel('Number of fatalities')
fig.subplots_adjust(hspace=0.3)
plt.show()